num_teams <- 20
# Dicionario para relacionar o id de um time com seu nome
team_names <- c("Dragões do Sertão", "Atlético Rio Vermelho", "Borborema",
"Guerreiros da Mata", "Cacique", "Aurora Litorânea",
"Gávea Azul", "Mandacaru United", "Capibaribe",
"Índios Tupiniquins", "Atlético Taquara Verde", "Seriema",
"Blumenau City", "Iguaçu", "Atlético Palmares",
"Serra Dourada", "Sambaqui", "Pampa",
"Riacho do Meio", "Sport Club Xingu")
games <- data.frame(
h = rep(1:num_teams, each = num_teams),
a = rep(1:num_teams, times = num_teams)
)
games <- games[games$h != games$a, ]Modelo Poisson de Mistura
Gerando os jogos
Definindo os parâmetros dos dados gerados:
set.seed(28)
beta_0 <- -0.1
home_effect <- 0.35
sd_att <- c(0.2, 0.01, 0.2)
sd_def <- c(0.2, 0.01, 0.2)
pi_att <- c(0.45, 0.1, 0.45)
pi_def <- c(0.4, 0.2, 0.4)
mu_att <- c(-0.4, 0, 0.4)
mu_def <- c(0.5, 0, -0.5)
att_category <- sample(1:3, 20, replace = T, prob = pi_att)
def_category <- sample(1:3, 20, replace = T, prob = pi_def)
att_effects <- rnorm(num_teams, mu_att[att_category], sd_att)
def_effects <- rnorm(num_teams, mu_def[def_category], sd_def)Esses foram os efeitos de ataque e defesa gerados:
Gerando os resultados dos jogos
set.seed(40)
simulate_games <- function(games){
num_games <- length(games$h)
home_team <- games$h
away_team <- games$a
theta_1 <- beta_0 + home_effect + att_effects[home_team] + def_effects[away_team]
theta_2 <- beta_0 + att_effects[away_team] + def_effects[home_team]
y1 <- rpois(num_games, exp(theta_1))
y2 <- rpois(num_games, exp(theta_2))
games$y1 <- y1
games$y2 <- y2
return(games)
}
games <- simulate_games(games)
num_games <- nrow(games)Estimando os parâmetros com o STAN
data <- c(G = num_games, T = num_teams, as.list(games))
model <- stan_model("./model/poisson_mistura_simple.stan")
iter <- 10000
fit <- sampling(model, data = data, iter = iter, chains = 2, cores = 2)Warning: There were 9316 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 2.13, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
data {
int<lower=1> G; // Numero total de jogos
int<lower=1> T; // Numero de total times
int<lower=0, upper=T> h[G]; // Indice do time que joga em casa no G-esimo jogo
int<lower=0, upper=T> a[G]; // Indice do time que joga como visitante no G-esimo jogo
int<lower=0> y1[G]; // Numero de gols marcados no G-esimo jogo pelo time que joga em casa
int<lower=0> y2[G]; // Numero de gols marcados no G-esimo jogo pelo time que joga como visitante
}
parameters {
real home;
real mu;
real<upper=-0.25> mu_att1;
real<lower=0.25> mu_att3;
real<lower=0.25> mu_def1;
real<upper=-0.25> mu_def3;
real<lower=0> sigma_att[3];
real<lower=0> sigma_def[3];
vector[T] att;
vector[T] def;
simplex[3] pi_att;
simplex[3] pi_def;
}
model {
real m_att[3];
real m_def[3];
real aux;
mu ~ normal(0, 10);
home ~ normal(0, 10);
mu_att1 ~ normal(0, 10);
mu_att3 ~ normal(0, 10);
mu_def1 ~ normal(0, 10);
mu_def3 ~ normal(0, 10);
sigma_att ~ cauchy(0, 0.05);
sigma_def ~ cauchy(0, 0.05);
pi_att ~ dirichlet(rep_vector(0.001, 3));
pi_def ~ dirichlet(rep_vector(0.001, 3));
for (t in 1:T) {
m_att[1] = log(pi_att[1]) + normal_lpdf(att[t] | mu_att1, sigma_att[1]) -
normal_lcdf(-0.2 | mu_att1, sigma_att[1]);
m_att[2] = log(pi_att[2]) + normal_lpdf(att[t] | 0, 0.01) -
log_diff_exp(normal_lcdf(0.2 | 0, 0.01),
normal_lcdf(-0.2 | 0, 0.01));
m_att[3] = log(pi_att[3]) + normal_lpdf(att[t] | mu_att3, sigma_att[3]) -
normal_lccdf(0.2 | mu_att3, sigma_att[3]);
m_def[1] = log(pi_def[1]) + normal_lpdf(def[t] | mu_def1, sigma_def[1]) -
normal_lccdf(0.2 | mu_def1, sigma_def[1]);
m_def[2] = log(pi_def[2]) + normal_lpdf(def[t] | 0, 0.01) -
log_diff_exp(normal_lcdf(0.2 | 0, 0.01),
normal_lcdf(-0.2 | 0, 0.01));
m_def[3] = log(pi_def[3]) + normal_lpdf(def[t] | mu_def3, sigma_def[3]) -
normal_lcdf(-0.2 | mu_def3, sigma_def[3]);
target += log_sum_exp(m_att) + log_sum_exp(m_def);
}
for (g in 1:G) {
y1[g] ~ poisson_log(mu + home + att[h[g]] + def[a[g]]);
y2[g] ~ poisson_log(mu + att[a[g]] + def[h[g]]);
}
}
generated quantities {
vector[G] y1_tilde;
vector[G] y2_tilde;
vector[G] log_lik;
for (g in 1:G) {
y1_tilde[g] = poisson_log_rng(mu + home + att[h[g]] + def[a[g]]);
y2_tilde[g] = poisson_log_rng(mu + att[a[g]] + def[h[g]]);
log_lik[g] = poisson_log_lpmf(y1[g] | mu + home + att[h[g]] + def[a[g]]) + poisson_log_lpmf(y2[g] | mu + att[a[g]] + def[h[g]]);
}
}Analisando possíveis problemas
Traceplot
Visualizando os valores estimados para 100 campeonatos
